import selenium
import re
from selenium import  webdriver
import openpyxl
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import time
import parsel
# from sklearn.utils.multiclass import type_of_target
# print(type_of_target(y_test))
import csv
import pyautogui
import random
import winsound



file_t = time.time()
options = webdriver.ChromeOptions()
# options.binary_location = r'E:\Google\Chrome\Application\chrome.exe'
# chrome_driver_binary = r'C:\Users\Administrator\AppData\Local\Programs\Python\Python37\chromedriver.exe'
options.binary_location = r'F:\chrome\Chrome-bin\chrome.exe'
chrome_driver_binary = r'chromedriver.exe'
driver = webdriver.Chrome(chrome_driver_binary, options=options)

# wb = openpyxl.load_workbook('d:/hc/newyes.xlsx')
# ws = wb.active
driver.get('https://weibo.com/')
script = 'Object.defineProperty(navigator,"webdriver",{get: () => false,});'
# 运行上面的js代码
driver.execute_script(script)
# 最大化窗口
driver.maximize_window()
time.sleep(11)
er_button = driver.find_element_by_css_selector('.qrcode_phone')
er_button.click()
time.sleep(2)
er_button = driver.find_element_by_css_selector('.qrcode_target')
er_button.click()
WebDriverWait(driver,60,0.5).until(EC.element_to_be_clickable((By.CSS_SELECTOR,'.gn_name')))
time.sleep(2)
WebDriverWait(driver,10,0.5).until(EC.element_to_be_clickable((By.CSS_SELECTOR,'.W_input')))
gaunjian_element = driver.find_element_by_css_selector('.W_input')
gaunjian_element.send_keys('武汉肺炎')
search = driver.find_element_by_css_selector('.ficon_search')
search.click()


# 手动设置高级搜索
time.sleep(40)
n = driver.window_handles
driver.switch_to.window(n[-1])
time.sleep(3)
number = len(driver.find_elements_by_css_selector('.list ul li'))
print(number,'number+')
for d in range(number):
    # driver.get('https://s.weibo.com/weibo?q=5G&scope=ori&suball=1&timescope=custom:2019-10-01:2020-03-31&Refer=SWeibo_box')
    WebDriverWait(driver,10,0.5).until(EC.element_to_be_clickable((By.CSS_SELECTOR,'.card-wrap')))
    driver.execute_script('window.scrollTo(0, document.body.scrollHeight);')
    time.sleep(2)
    dd = d + 1
    source = driver.page_source
    # print(source,'source+')
    source_css = parsel.Selector(source)
    card_wrap = source_css.css('.card-wrap')[:-3]
    # 遇到找不到页面的反爬，通过刷新页面解决
    try:
        for i in card_wrap:
            test = i.css('.txt')[-1]
    except:
        winsound.Beep(2222, 111)
        winsound.Beep(2222, 111)
        winsound.Beep(2222, 111)
        driver.refresh()
        time.sleep(20)
        n = driver.window_handles
        driver.switch_to.window(n[-1])
        source = driver.page_source
        # print(source,'source+')
        source_css = parsel.Selector(source)
        card_wrap = source_css.css('.card-wrap')[:-3]

    card_list = []
    card_num = 0
    for i in card_wrap:

        name = str(i.css('.name::text').get())
        print(name,'name+')
        try:
            title2 = i.css('a[title="微博官方认证"]')[0]

            title = "蓝V"
        except:
            try:
                title1 = i.css('a[title="微博个人认证"]')[0]

                title = "黄V"
            except:
                title = '普通用户'

        print(title, 'title+')
        txt = i.css('.txt')[-1]
        txt = txt.css('::text').getall()
        txt = ''.join(txt)
        txt = txt.strip()
        print(type(txt))
        print(txt,'txt+')
        try:
            txt_href = i.css('.txt')[-1]
            txt_href = str(txt_href.css('a::attr(href)').getall())
        except:
            txt_href = 'None'
        print(txt_href,'txt_href+')
        try:
            time_list = []
            timet = i.css('.from')[0]
            timet = timet.css('a::text').getall()
            for tim in timet:
                tim = tim.strip()
                time_list.append(tim)
            timet = ''.join(time_list)
        except:
            timet = 'None'
        print(time,'time+')

        try:
            card_act_list = []
            card_act = i.css('.card-act li')
            for g in card_act[:-1]:
                act = g.css('::text').getall()
                for gg in act:
                    gg = gg.strip()
                    card_act_list.append(gg)
                print(card_act_list,'card_act_list+')
            card_act_list.append('赞')
            zan = card_act[-1]
            zan = zan.css('::text').getall()
            for ggg in zan:
                ggg = ggg.strip()
                card_act_list.append(ggg)
            card_act = ''.join(card_act_list)

        except:
            card_act = 'None'
        print(card_act,'card_act+')

        try:
            media = i.css('.media')
            try:
                # 找不到不会报错，但是结果为空列表
                click_href = str(media.css('a::attr(href)').getall())
            except:
                click_href = 'None'
            print(click_href,'click_href+')
            try:
                picture = str(media.css('img::attr(src)').getall())
            except:
                picture = 'None'
            print(picture,'picture+')
            try:
                video = str(media.css('video::attr(src)').getall())
            except:
                video = 'None'
            print(video,'video+')
            media_content = 1
        except:
            media_content = 0


        # 这里开始修改！
        user_button = driver.find_elements_by_css_selector('.card-wrap')[card_num]
        user_button = user_button.find_elements_by_css_selector('.info>div>.name')[0]

        try:
            user_button.click()
        except:
            driver.execute_script("arguments[0].click();", user_button)
        n = driver.window_handles
        driver.switch_to.window(n[-1])
        try:
            # 向下翻一页
            driver.execute_script('window.scrollBy(0,200)')
            WebDriverWait(driver, 60, 1).until(EC.element_to_be_clickable((By.CSS_SELECTOR, '.ul_detail')))
            time.sleep(3)
            user_page = driver.page_source
            user_css = parsel.Selector(user_page)
            time.sleep(2)
            place = user_css.css('.ul_detail span.item_text')[1]
            if title == '黄V':
                place = user_css.css('.ul_detail span.item_text')[0]
            place = place.css('::text').get()
            place = place.strip()
        except:
            place = 'None'
        print(place, 'place_f')
        driver.close()
        n = driver.window_handles
        driver.switch_to.window(n[0])
        # 到这里


        if media_content == 1:
            if title == '蓝V':
                place = 'None'
            if '简介' in place:
                place = 'None'
            card_list.append([name,title,place,txt,card_act,timet])
        else:
            if title == '蓝V':
                place = 'None'
            if '简介' in place:
                place = 'None'
            card_list.append([name,title,place,txt,card_act,timet])
        card_num = card_num + 1

        # ws.append(card_list)
        print(card_list,'card_list')
        print(type(card_list),'card_list+')
    # 注意！中文出现乱码的情况考虑选用gb18030！
    with open (str(file_t)+'.csv',mode='a',newline='',encoding='utf_8_sig') as f:
        file = csv.writer(f,dialect='excel')
        for i in card_list:
            file.writerow(i)
    driver.execute_script('window.scrollTo(0, document.body.scrollHeight);')
    time.sleep(4)
    try:
        if dd == 1:
            pyautogui.moveTo(803, 883, duration=1)
            pyautogui.click()
        else:
            pyautogui.moveTo(903, 882, duration=1)
            pyautogui.click()
        print('111111')
    except:
        button = driver.find_elements_by_css_selector('.list li')[dd]
        driver.execute_script("arguments[0].click();", button)
        print('222222')
    sleep_time = random.randint(10,17)
    time.sleep(sleep_time)
    print('爬完' + str(dd) + '页')
